Set the working dir
setwd("/mnt/picea/projects/arabidopsis/mschmid/porcupine-RNA-Seq")
Libs
suppressPackageStartupMessages(library(DESeq2))
suppressPackageStartupMessages(library(gplots))
suppressPackageStartupMessages(library(Glimma))
suppressPackageStartupMessages(library(LSD))
suppressPackageStartupMessages(library(org.At.tair.db))
suppressPackageStartupMessages(library(pander))
suppressPackageStartupMessages(library(RColorBrewer))
suppressPackageStartupMessages(library(VennDiagram))
Helper files
suppressMessages(source("~/Git/UPSCb/src/R/densityPlot.R"))
suppressMessages(source("~/Git/UPSCb/src/R/plotMA.R"))
suppressMessages(source("~/Git/UPSCb/src/R/volcanoPlot.R"))
suppressMessages(source("~/Git/UPSCb/src/R/plot.multidensity.R"))
Load saved data Read the sample information
samples <- read.csv("~/Git/UPSCb/projects/arabidopsis-porcupine-RNA-Seq/doc/RNA-seq_pcp_vs_Col-0_info.csv")
And extend it
samples$Genotype=sub(" .*","",samples$Description)
samples$Temp=ifelse(sub(".* ","",samples$Description)=="16",
ifelse(nchar(as.character(samples$Description))>16,"WC","C"),
ifelse(nchar(as.character(samples$Description))>16,"CW","W"))
Gene raw expression
cg <- read.csv("analysis/kallisto/combined-raw-unormalised-gene-expression_data.csv",
row.names=1)
Transcript raw expression
ct <- read.csv("analysis/kallisto/combined-raw-unormalised-transcript-expression_data.csv",
row.names=1)
Gene normalised expression
vst.g <- read.csv("analysis/kallisto/combined-library-size-normalized_variance-stabilized_gene-expression_data.csv",
row.names=1)
Transcript raw expression
vst.t <- read.csv("analysis/kallisto/combined-library-size-normalized_variance-stabilized_transcript-expression_data.csv",
row.names=1)
Setup graphics
pal=brewer.pal(8,"Dark2")
mar <- par("mar")
This is pcp
pcp <- "AT2G18740"
".runDE" <- function(dds, annot, exp, alpha=0.01, lfc=0.5, outdir=NULL, type=c("gene","transcript")){
# default
type <- match.arg(type)
if(type=="transcript"){
display.columns <- c("TranscriptID", "SYMBOL","GENENAME")
id.column <- "TranscriptID"
} else {
display.columns <- c("GeneID", "SYMBOL","GENENAME")
id.column <- "GeneID"
}
# Differential Expression
suppressMessages(dds <- DESeq(dds))
# contrast
ct <- gsub("condition","",paste(resultsNames(dds)[-1],collapse="-vs-"))
if(is.null(outdir)){
outdir <- ct
}
# Dispersion Estimation
plotDispEsts(dds)
# Results
res <- results(dds,as.list(resultsNames(dds)[-1]))
# Plot the Median vs Average
plotMA(res,alpha,lfc)
# Plot the log odds vs. log2 fold change
volcanoPlot(res,alpha=alpha,lfc=lfc)
# Plot the adjusted p-value histogram
hist(res$padj,breaks=seq(0,1,.01))
# Select genes below alpha and above lfc
sel <- res$padj<alpha & !is.na(res$padj) & abs(res$log2FoldChange) >= lfc
sprintf("There are %s genes differentially expressed at a %s FDR and a %s logFC cutoffs",
sum(sel),alpha,lfc)
# Write them out
dir.create(outdir,showWarnings=FALSE,recursive=TRUE)
write.csv(annot[sel,],
file=file.path(outdir,
paste(ct,alpha,"FDR",lfc,
"log2FC-cutoffs_significant-genes.csv",sep="-")))
write.csv(annot[sel,][sign(res$log2FoldChange[sel])==1,],
file=file.path(outdir,
paste(ct,alpha,"FDR",lfc,
"log2FC-cutoffs_up-regulated_significant-genes.csv",sep="-")))
write.csv(annot[sel,][sign(res$log2FoldChange[sel])==-1,],
file=file.path(outdir,
paste(ct,alpha,"FDR",lfc,
"log2FC-cutoffs_down-regulated_significant-genes.csv",sep="-")))
write.csv(cbind(as.data.frame(res),annot),
file=file.path(outdir,
paste0(ct,".csv")))
# Feature name
fname <- rownames(res)[sel]
# Cluster the VST expression of the genes
heatmap.2(as.matrix(exp[rownames(exp) %in% fname,colnames(exp) %in% dds$ID]),
scale="row",labRow=NA,trace="none",
RowSideColors=ifelse(sign(res[sel,"log2FoldChange"])==-1,
"darkorange",
"yellow"),
labCol=colData(dds)$condition)
# Interactive MDS and MA
res.df <- as.data.frame(res)
res.df$log10MeanNormCount <- log10(res.df$baseMean)
idx <- rowSums(counts(dds)) > 0
res.df <- res.df[idx,]
res.df$padj[is.na(res.df$padj)] <- 1
glMDPlot(res.df,
xval = "log10MeanNormCount",
yval = "log2FoldChange",
counts = counts(dds)[idx,],
anno = annot[idx,],
groups = dds$condition,
samples = paste(dds$condition,dds$replicate,sep="-"),
status = sel[idx],
display.columns = display.columns,
id.column = id.column,
path = outdir,
folder = paste0(ct,"-report"),launch=FALSE)
# Done
return(fname)
}
Re-order the samples
stopifnot(all(colnames(cg) == colnames(vst.g)))
stopifnot(all(colnames(ct) == colnames(vst.t)))
stopifnot(all(colnames(ct) == colnames(cg)))
colnames(ct) <- colnames(cg) <- colnames(vst.t) <- colnames(vst.g) <- sub("X","",colnames(cg))
samples <- samples[match(colnames(ct),samples$SampleID),]
df.annot <- data.frame(GeneID=rownames(cg),
SYMBOL=mapIds(org.At.tair.db,keys=rownames(cg),"SYMBOL","TAIR"),
GENENAME=mapIds(org.At.tair.db,keys=rownames(cg),"GENENAME","TAIR"))
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
tx.annot <- df.annot[match(sub("\\.[0-9]+","",rownames(vst.t)),df.annot$GeneID),]
row.names(tx.annot) <- rownames(vst.t)
tx.annot$TranscriptID <- rownames(vst.t)
tx.annot <- tx.annot[,c(4,1:3)]
outdir <- "analysis/DESeq2/Kallisto-gene/simple-contrasts/pcp"
sel <- samples$Genotype=="pcp" & samples$Temp %in% c("C","CW")
C.vs.CW <- .runDE(
DESeqDataSetFromMatrix(
countData = cg[,sel],
colData = data.frame(condition=samples$Temp[sel],
replicate=samples$Replicate[sel],
ID=samples$SampleID[sel]),
design = ~condition),
df.annot, vst.g, outdir=outdir)
sel <- samples$Genotype=="pcp" & samples$Temp %in% c("C","W")
C.vs.W <- .runDE(
DESeqDataSetFromMatrix(
countData = cg[,sel],
colData = data.frame(condition=samples$Temp[sel],
replicate=samples$Replicate[sel],
ID=samples$SampleID[sel]),
design = ~condition),
df.annot, vst.g, outdir=outdir)
sel <- samples$Genotype=="pcp" & samples$Temp %in% c("W","CW")
CW.vs.W <- .runDE(
DESeqDataSetFromMatrix(
countData = cg[,sel],
colData = data.frame(condition=samples$Temp[sel],
replicate=samples$Replicate[sel],
ID=samples$SampleID[sel]),
design = ~condition),
df.annot, vst.g, outdir=outdir)
sel <- samples$Genotype=="pcp" & samples$Temp %in% c("C","WC")
C.vs.WC <- .runDE(
DESeqDataSetFromMatrix(
countData = cg[,sel],
colData = data.frame(condition=samples$Temp[sel],
replicate=samples$Replicate[sel],
ID=samples$SampleID[sel]),
design = ~condition),
df.annot, vst.g, outdir=outdir)
sel <- samples$Genotype=="pcp" & samples$Temp %in% c("W","WC")
W.vs.WC <- .runDE(
DESeqDataSetFromMatrix(
countData = cg[,sel],
colData = data.frame(condition=samples$Temp[sel],
replicate=samples$Replicate[sel],
ID=samples$SampleID[sel]),
design = ~condition),
df.annot, vst.g, outdir=outdir)
plot.new()
grid.draw(venn.diagram(list(
C.vs.CW=C.vs.CW,
C.vs.W=C.vs.W,
CW.vs.W=CW.vs.W,
C.vs.WC=C.vs.WC,
W.vs.WC=W.vs.WC
),
filename=NULL,
col=pal[1:5],
category.names=c("C.vs.CW","C.vs.W","CW.vs.W","C.vs.WC","W.vs.WC")))
outdir <- "analysis/DESeq2/Kallisto-gene/simple-contrasts/Col-0"
sel <- samples$Genotype=="Col-0" & samples$Temp %in% c("C","CW")
C.vs.CW <- .runDE(
DESeqDataSetFromMatrix(
countData = cg[,sel],
colData = data.frame(condition=samples$Temp[sel],
replicate=samples$Replicate[sel],
ID=samples$SampleID[sel]),
design = ~condition),
df.annot, vst.g, outdir=outdir)
sel <- samples$Genotype=="Col-0" & samples$Temp %in% c("C","W")
C.vs.W <- .runDE(
DESeqDataSetFromMatrix(
countData = cg[,sel],
colData = data.frame(condition=samples$Temp[sel],
replicate=samples$Replicate[sel],
ID=samples$SampleID[sel]),
design = ~condition),
df.annot, vst.g, outdir=outdir)
sel <- samples$Genotype=="Col-0" & samples$Temp %in% c("W","CW")
CW.vs.W <- .runDE(
DESeqDataSetFromMatrix(
countData = cg[,sel],
colData = data.frame(condition=samples$Temp[sel],
replicate=samples$Replicate[sel],
ID=samples$SampleID[sel]),
design = ~condition),
df.annot, vst.g, outdir=outdir)
sel <- samples$Genotype=="Col-0" & samples$Temp %in% c("C","WC")
C.vs.WC <- .runDE(
DESeqDataSetFromMatrix(
countData = cg[,sel],
colData = data.frame(condition=samples$Temp[sel],
replicate=samples$Replicate[sel],
ID=samples$SampleID[sel]),
design = ~condition),
df.annot, vst.g, outdir=outdir)
sel <- samples$Genotype=="Col-0" & samples$Temp %in% c("W","WC")
W.vs.WC <- .runDE(
DESeqDataSetFromMatrix(
countData = cg[,sel],
colData = data.frame(condition=samples$Temp[sel],
replicate=samples$Replicate[sel],
ID=samples$SampleID[sel]),
design = ~condition),
df.annot, vst.g, outdir=outdir)
plot.new()
grid.draw(venn.diagram(list(
C.vs.CW=C.vs.CW,
C.vs.W=C.vs.W,
CW.vs.W=CW.vs.W,
C.vs.WC=C.vs.WC,
W.vs.WC=W.vs.WC
),
filename=NULL,
col=pal[1:5],
category.names=c("C.vs.CW","C.vs.W","CW.vs.W","C.vs.WC","W.vs.WC")))
outdir <- "analysis/DESeq2/Kallisto-gene/simple-contrasts/Cold"
sel <- samples$Temp %in% "C"
Col0.vs.pcp.C <- .runDE(
DESeqDataSetFromMatrix(
countData = cg[,sel],
colData = data.frame(condition=samples$Genotype[sel],
replicate=samples$Replicate[sel],
ID=samples$SampleID[sel]),
design = ~condition),
df.annot, vst.g, outdir=outdir)
outdir <- "analysis/DESeq2/Kallisto-gene/simple-contrasts/Warm"
sel <- samples$Temp %in% "W"
Col0.vs.pcp.W <- .runDE(
DESeqDataSetFromMatrix(
countData = cg[,sel],
colData = data.frame(condition=samples$Genotype[sel],
replicate=samples$Replicate[sel],
ID=samples$SampleID[sel]),
design = ~condition),
df.annot, vst.g, outdir=outdir)
plot.new()
grid.draw(venn.diagram(list(
Col0.vs.pcp.C=Col0.vs.pcp.C,
Col0.vs.pcp.W=Col0.vs.pcp.W
),
filename=NULL,
col=pal[1:2],
category.names=c("Cold","Warm")))
outdir <- "analysis/DESeq2/Kallisto-transcript/simple-contrasts/pcp"
sel <- samples$Genotype=="pcp" & samples$Temp %in% c("C","CW")
C.vs.CW <- .runDE(
DESeqDataSetFromMatrix(
countData = ct[,sel],
colData = data.frame(condition=samples$Temp[sel],
replicate=samples$Replicate[sel],
ID=samples$SampleID[sel]),
design = ~condition),
tx.annot, vst.t, outdir=outdir,type="transcript")
sel <- samples$Genotype=="pcp" & samples$Temp %in% c("C","W")
C.vs.W <- .runDE(
DESeqDataSetFromMatrix(
countData = ct[,sel],
colData = data.frame(condition=samples$Temp[sel],
replicate=samples$Replicate[sel],
ID=samples$SampleID[sel]),
design = ~condition),
tx.annot, vst.t, outdir=outdir,type="transcript")
sel <- samples$Genotype=="pcp" & samples$Temp %in% c("W","CW")
CW.vs.W <- .runDE(
DESeqDataSetFromMatrix(
countData = ct[,sel],
colData = data.frame(condition=samples$Temp[sel],
replicate=samples$Replicate[sel],
ID=samples$SampleID[sel]),
design = ~condition),
tx.annot, vst.t, outdir=outdir,type="transcript")
sel <- samples$Genotype=="pcp" & samples$Temp %in% c("C","WC")
C.vs.WC <- .runDE(
DESeqDataSetFromMatrix(
countData = ct[,sel],
colData = data.frame(condition=samples$Temp[sel],
replicate=samples$Replicate[sel],
ID=samples$SampleID[sel]),
design = ~condition),
tx.annot, vst.t, outdir=outdir,type="transcript")
sel <- samples$Genotype=="pcp" & samples$Temp %in% c("W","WC")
W.vs.WC <- .runDE(
DESeqDataSetFromMatrix(
countData = ct[,sel],
colData = data.frame(condition=samples$Temp[sel],
replicate=samples$Replicate[sel],
ID=samples$SampleID[sel]),
design = ~condition),
tx.annot, vst.t, outdir=outdir,type="transcript")
plot.new()
grid.draw(venn.diagram(list(
C.vs.CW=C.vs.CW,
C.vs.W=C.vs.W,
CW.vs.W=CW.vs.W,
C.vs.WC=C.vs.WC,
W.vs.WC=W.vs.WC
),
filename=NULL,
col=pal[1:5],
category.names=c("C.vs.CW","C.vs.W","CW.vs.W","C.vs.WC","W.vs.WC")))
outdir <- "analysis/DESeq2/Kallisto-transcript/simple-contrasts/Col-0"
sel <- samples$Genotype=="Col-0" & samples$Temp %in% c("C","CW")
C.vs.CW <- .runDE(
DESeqDataSetFromMatrix(
countData = ct[,sel],
colData = data.frame(condition=samples$Temp[sel],
replicate=samples$Replicate[sel],
ID=samples$SampleID[sel]),
design = ~condition),
tx.annot, vst.t, outdir=outdir,type="transcript")
sel <- samples$Genotype=="Col-0" & samples$Temp %in% c("C","W")
C.vs.W <- .runDE(
DESeqDataSetFromMatrix(
countData = ct[,sel],
colData = data.frame(condition=samples$Temp[sel],
replicate=samples$Replicate[sel],
ID=samples$SampleID[sel]),
design = ~condition),
tx.annot, vst.t, outdir=outdir,type="transcript")
sel <- samples$Genotype=="Col-0" & samples$Temp %in% c("W","CW")
C.vs.CW <- .runDE(
DESeqDataSetFromMatrix(
countData = ct[,sel],
colData = data.frame(condition=samples$Temp[sel],
replicate=samples$Replicate[sel],
ID=samples$SampleID[sel]),
design = ~condition),
tx.annot, vst.t, outdir=outdir,type="transcript")
sel <- samples$Genotype=="Col-0" & samples$Temp %in% c("C","WC")
C.vs.WC <- .runDE(
DESeqDataSetFromMatrix(
countData = ct[,sel],
colData = data.frame(condition=samples$Temp[sel],
replicate=samples$Replicate[sel],
ID=samples$SampleID[sel]),
design = ~condition),
tx.annot, vst.t, outdir=outdir,type="transcript")
sel <- samples$Genotype=="Col-0" & samples$Temp %in% c("W","WC")
W.vs.WC <- .runDE(
DESeqDataSetFromMatrix(
countData = ct[,sel],
colData = data.frame(condition=samples$Temp[sel],
replicate=samples$Replicate[sel],
ID=samples$SampleID[sel]),
design = ~condition),
tx.annot, vst.t, outdir=outdir,type="transcript")
plot.new()
grid.draw(venn.diagram(list(
C.vs.CW=C.vs.CW,
C.vs.W=C.vs.W,
CW.vs.W=CW.vs.W,
C.vs.WC=C.vs.WC,
W.vs.WC=W.vs.WC
),
filename=NULL,
col=pal[1:5],
category.names=c("C.vs.CW","C.vs.W","CW.vs.W","C.vs.WC","W.vs.WC")))
outdir <- "analysis/DESeq2/Kallisto-transcript/simple-contrasts/Cold"
sel <- samples$Temp %in% "C"
Col0.vs.pcp.C <- .runDE(
DESeqDataSetFromMatrix(
countData = ct[,sel],
colData = data.frame(condition=samples$Genotype[sel],
replicate=samples$Replicate[sel],
ID=samples$SampleID[sel]),
design = ~condition),
tx.annot, vst.t, outdir=outdir,type="transcript")
outdir <- "analysis/DESeq2/Kallisto-transcript/simple-contrasts/Warm"
sel <- samples$Temp %in% "W"
Col0.vs.pcp.W <- .runDE(
DESeqDataSetFromMatrix(
countData = ct[,sel],
colData = data.frame(condition=samples$Genotype[sel],
replicate=samples$Replicate[sel],
ID=samples$SampleID[sel]),
design = ~condition),
tx.annot, vst.t, outdir=outdir,type="transcript")
plot.new()
grid.draw(venn.diagram(list(
Col0.vs.pcp.C=Col0.vs.pcp.C,
Col0.vs.pcp.W=Col0.vs.pcp.W
),
filename=NULL,
col=pal[1:2],
category.names=c("Cold","Warm")))
## R version 3.3.2 (2016-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.1 LTS
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid parallel stats4 stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] VennDiagram_1.6.17 futile.logger_1.4.3
## [3] RColorBrewer_1.1-2 pander_0.6.0
## [5] org.At.tair.db_3.4.0 AnnotationDbi_1.36.0
## [7] LSD_3.0 Glimma_1.2.1
## [9] gplots_3.0.1 DESeq2_1.14.1
## [11] SummarizedExperiment_1.4.0 Biobase_2.34.0
## [13] GenomicRanges_1.26.1 GenomeInfoDb_1.10.1
## [15] IRanges_2.8.1 S4Vectors_0.12.1
## [17] BiocGenerics_0.20.0
##
## loaded via a namespace (and not attached):
## [1] base64_2.0 Rcpp_0.12.8 locfit_1.5-9.1
## [4] lattice_0.20-34 gtools_3.5.0 assertthat_0.1
## [7] rprojroot_1.1 digest_0.6.11 plyr_1.8.4
## [10] futile.options_1.0.0 backports_1.0.4 acepack_1.4.1
## [13] RSQLite_1.1-1 evaluate_0.10 ggplot2_2.2.1
## [16] zlibbioc_1.20.0 lazyeval_0.2.0 data.table_1.10.0
## [19] annotate_1.52.0 gdata_2.17.0 rpart_4.1-10
## [22] Matrix_1.2-7.1 rmarkdown_1.2 splines_3.3.2
## [25] BiocParallel_1.8.1 geneplotter_1.52.0 stringr_1.1.0
## [28] foreign_0.8-67 RCurl_1.95-4.8 munsell_0.4.3
## [31] htmltools_0.3.5 nnet_7.3-12 openssl_0.9.5
## [34] tibble_1.2 gridExtra_2.2.1 htmlTable_1.7
## [37] edgeR_3.16.5 Hmisc_4.0-1 XML_3.98-1.5
## [40] bitops_1.0-6 xtable_1.8-2 gtable_0.2.0
## [43] DBI_0.5-1 magrittr_1.5 scales_0.4.1
## [46] KernSmooth_2.23-15 stringi_1.1.2 XVector_0.14.0
## [49] genefilter_1.56.0 limma_3.30.7 latticeExtra_0.6-28
## [52] Formula_1.2-1 lambda.r_1.1.9 tools_3.3.2
## [55] survival_2.40-1 yaml_2.1.14 colorspace_1.3-2
## [58] cluster_2.0.5 caTools_1.17.1 memoise_1.0.0
## [61] knitr_1.15.1